
# Load Libraries
library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)
library(ggsignif)
library(hrbrthemes)
library(extrafont)
library(ggthemes)
library(grid)

loadfonts()


## Stated Factors
mydata.factors <- read.csv(file = "data/study_1/responses_factors.csv", head = T, sep = c(","))

mydata2.factors <- mydata.factors %>%  mutate(Factors = strsplit(as.character(Factors), ",")) %>% unnest(Factors)

mydata2.factors <- cbind(mydata2.factors,1)
colnames(mydata2.factors)[3] <- "ones"

mydata3.factors <- mydata2.factors %>% spread(Factors,ones)
mydata3.factors[is.na(mydata3.factors)] <- 0

plotdata.factor <- mydata2.factors[,c("Factors","ones")] %>% group_by(Factors) %>% summarise(Freq = sum(ones)/391)
plotdata.factor$Freq[2] <- plotdata.factor$Freq[2] +0.00000001
plotdata.factor$Factors <- gsub(" \\(specify below\\)","",plotdata.factor$Factors)
plotdata.factor$Factors <- factor(plotdata.factor$Factors,
                                  rev(as.ordered(plotdata.factor$Factors[match(sort(plotdata.factor$Freq),plotdata.factor$Freq)])))
plotdata.factor <- plotdata.factor[-which(plotdata.factor$Factors=="Others"),]

## Actual Factors
mydata.img <- read.csv(file = "data/study_1/responses_per_image.csv", head = T, sep = c(","))

features <- read.csv(file = "data/features_labels_wide.csv", head = T, sep = c(","))
truth <- features[, c("file_id", "escaped")]
truth$file_id <- as.character(truth$file_id)

mydata.img.truth <-
  merge(mydata.img,
        truth,
        by.x = c("Img"),
        by.y = c("file_id"))

mydata.img.features <-
  merge(mydata.img.truth[, c("Img", "Esc")],
        features,
        by.x = c("Img"),
        by.y = c("file_id"))

mydata.img.features$Esc <- ifelse(mydata.img.features$Esc == "Y", 1, 0)

actual <- matrix(NA,9,1)
x_atts <- c(
  "age_std",
  "race_entropy",
  "gender_entropy",
  "pos_mst_avg",
  "num_ppl", 
  "smile_coef_mean",
  "p_glass",
  "age_mean",
  "smile_coef_std"
)
for(i in 1:length(x_atts)) {
  cor_res <- cor.test(
    mydata.img.features$Esc,
    mydata.img.features[, x_atts[i]],
    method = "spearman",
    exact = F
  )
  actual[i, 1] <- abs(cor_res$estimate)
}

actual

plotdata.factor$Actual <- actual

plotdata.factor$Factors <- c(
  "Age (STD)",
  "Ethnic Diversity",
  "Gender Diversity",
  "Distance between people",
  "Number of people",
  "Smiling Index (Mean)",
  "Fraction of people with glasses",
  "Age (Mean)",
  "Smiling Index (STD)"
)

plotdata.factor <- plotdata.factor[c(5, 4, 3, 2, 8, 1, 6, 9, 7), ]

## Plotting
fig_out_path <- "plots/pdfs/prediction_factors.pdf"
pdf(fig_out_path, family="Roboto Condensed", width=9, height=3.5)

par(mfrow=c(1,3), mai=c(0.8, 0, 0.4, 0.1), lend=1) 

# parameters
labels <- plotdata.factor$Factors
points_cex <- 2.9
line_width <- 1.5
main_line_width <- 14
ticks_size <- 0.02
ticks_lwd <- 1
top_bottom_rect_space <- 0.2

p1_vals <- plotdata.factor$Freq
p1_x_min <- -0.1
p1_x_max <- 1
p1_title <- "Human (Stated)"
p1_x_lab <- "Proportion of Respondents"
p1_col <- "#01665e"

p2_vals <- plotdata.factor$Actual
p2_x_min <- -0.05
p2_x_max <- 0.5
p2_title <- "Human (Actual)"
p2_x_lab <- expression(paste("| Spearman's ",rho," |"))#"|Spearman's rho|"
p2_col <- "#c51b7d"

n <- length(p1_vals)

# panel 1: text
plot(
  c(),
  c(),
  xlim=c(2, 5), 
  ylim=c(1 - top_bottom_rect_space, n + top_bottom_rect_space), 
  axes = FALSE, 
  xlab="", 
  ylab=""
)
text(x=3, y=n:1, labels, pos=4, cex=1.2, font=2)

# panel 2: 1st set of notes
plot(
  c(),
  c(),
  xlim=c(p1_x_min, p1_x_max), 
  ylim=c(1 - top_bottom_rect_space, n + top_bottom_rect_space), 
  yaxt='n',
  main=p1_title,
  cex.main=1.7,
  xlab=p1_x_lab, 
  ylab="",
  cex.lab=1.5
)

for(i in seq(1, n, 2)){
  rect(
    p1_x_min * 2, 
    i - 0.5, 
    p1_x_max * 2, 
    i + 0.5,
    col = rgb(0, 0, 0, alpha=0.1),
    border = NA
  )
}

segments(
  x0 = rep(0, n),
  y0 = n:1,
  x1 = p1_vals,
  y1 = n:1,
  lwd = main_line_width,
  col = alpha(p1_col, 0.6)
)

points(
  p1_vals, 
  n:1, 
  pch=16,
  cex=points_cex,
  col=p1_col
)

points(
  p1_vals, 
  n:1, 
  pch=16,
  cex=points_cex * 0.7,
  col="white"
)

abline(v=0, lwd=line_width)

# panel 3: 2nd set of notes
plot(
  c(),
  c(),
  xlim=c(p2_x_min, p2_x_max), 
  ylim=c(1 - top_bottom_rect_space, n + top_bottom_rect_space), 
  yaxt='n',
  main=p2_title,
  cex.main=1.7,
  xlab=p2_x_lab, 
  ylab="",
  cex.lab=1.5
)

for(i in seq(1, n, 2)){
  rect(
    p2_x_min * 2, 
    i - 0.5, 
    p2_x_max * 2, 
    i + 0.5,
    col = rgb(0, 0, 0, alpha=0.1),
    border = NA
  )
}

segments(
  x0 = rep(0, n),
  y0 = n:1,
  x1 = p2_vals,
  y1 = n:1,
  lwd = main_line_width,
  col = alpha(p2_col, 0.6)
)

points(
  p2_vals, 
  n:1, 
  pch=16,
  cex=points_cex,
  col=p2_col
)

points(
  p2_vals, 
  n:1, 
  pch=16,
  cex=points_cex * 0.7,
  col="white"
)

abline(v=0, lwd=line_width)

dev.off()
embed_fonts(fig_out_path, outfile=fig_out_path)

# END